# reading in an external cvs (comma separated values) file faith=read.csv('https://webpages.uidaho.edu/~renaes/Data/faithdata.csv',header=T) # or use the "Import dataset" option in the Environment window (upper right window in RStudio) # see the first 6 observations head(faith) # or just type in the dataset name in the console to see the whole dataset # or use View() to see the dataset in a spreadsheet type format (note the capital V) View(faith) hist(eruptions) # error # the variable is an object inside the faith dataset so to access just one variable # you need to either use a two-level name (datasetname$variablename) such as faith$eruptions # or use attach(datasetname) to access the variables without the two-level name faith$duration attach(faith) duration hist(duration) hist(wait) boxplot(duration) # put a title in since boxplot() does not have a title by default boxplot(duration,main='Eruptions') # or use title() after boxplot() boxplot(duration,horizontal=T); title('Eruptions') # scatterplot x=eruptions y=waiting plot(duration, wait,main='Old Faithful') # summary statistics # individual commands mean(duration); mean(wait) var(duration); var(wait) sd(duration); sd(wait) median(duration); median(wait) max(duration); max(wait) min(duration); min(wait) length(duration) # gives the sample size # the mode needs the modeest package (but will actually need the statip package loaded) # install.packages("modeest") library(statip) mfv(duration) mfv(wait) # using summary() but only gives mean, min, max, median, q1, q3 summary(faith) # or use summary(eruptions); summary(waiting) # using a package in R # packages install functions that are not in the base version # using stargazer package install.packages("stargazer") # only install one time (not every time you need it) # once it is installed, load the package using library() library(stargazer) stargazer(faith,type='text') # there are more... # pasetecs pkg, stat.desc install.packages("pastecs") # install only once library(pastecs) stat.desc(faith) colors=c('Red','Blue','Green','Orange','Yellow','Brown') observed=c(92,157,102,190,91,101) barplot(observed,names.arg=colors,col=colors,ylim=c(0,200), ylab='Counts',main="Distribution of M&M Colors")